Using AWS EC2 instance, AWS S3 Buckets, and Tensorflow open source python library
Using Tensorflow open source python library to create a Generative Adversarial Network that is trained on three datasets: one for numbers, another for faces, and one for memes.
A Generative Adversarial network is a combination of two neural networks, a discriminator and a generator. The generator is fed an array of random numbers as input and returns an image. The discriminator takes in this generated image as input as well as images taken from the real dataset and returns a prediction as to whether the image was from the dataset (real) or generate (fake), and both neural networks are updated and trained using a loss function to produce intended results.
data_dir = './data'
import helper
helper.download_extract('mnist', data_dir)
helper.download_extract('celeba', data_dir)
# For the meme dataset, I compiled a 18,780 image long directory on my personal computer.
show_n_images = 25
%matplotlib inline
import os
from glob import glob
from matplotlib import pyplot
from PIL import Image
mnist_images = helper.get_batch(glob(os.path.join(data_dir, 'mnist/*.jpg'))[:show_n_images], 28, 28, 'L')
pyplot.imshow(helper.images_square_grid(mnist_images, 'L'), cmap='gray')
show_n_images = 25
mnist_images = helper.get_batch(glob(os.path.join(data_dir, 'memes/*.jpg'))[:show_n_images], 28, 28, 'RGB')
pyplot.imshow(helper.images_square_grid(mnist_images, 'RGB'))
show_n_images = 25
mnist_images = helper.get_batch(glob(os.path.join(data_dir, 'img_align_celeba/*.jpg'))[:show_n_images], 28, 28, 'RGB')
pyplot.imshow(helper.images_square_grid(mnist_images, 'RGB'))
from distutils.version import LooseVersion
import warnings
import tensorflow as tf
# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer. You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))
# Check for a GPU
if not tf.test.gpu_device_name():
warnings.warn('No GPU found. Please use a GPU to train your neural network.')
else:
print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))
def model_inputs(image_width, image_height, image_channels, z_dim):
"""
Create the model inputs
:param image_width: The input image width
:param image_height: The input image height
:param image_channels: The number of image channels
:param z_dim: The dimension of Z
:return: Tuple of (tensor of real input images, tensor of z data, learning rate)
"""
# TODO: Implement Function
real_input_images = tf.placeholder(tf.float32, (None, image_width, image_height, image_channels), name="input_real")
z_data = tf.placeholder(tf.float32, (None, z_dim), name="input_z")
learning_rate = tf.placeholder(tf.float32, (None), name="learning_rate")
return real_input_images, z_data, learning_rate
print("Success")
def discriminator(images, reuse=False):
"""
Create the discriminator network
:param images: Tensor of input image(s)
:param reuse: Boolean if the weights should be reused
:return: Tuple of (tensor output of the discriminator, tensor logits of the discriminator)
"""
# TODO: Implement Function
alpha = 0.2
with tf.variable_scope('discriminator', reuse=reuse):
x = tf.layers.conv2d(images, 28, 4, strides=2, padding='same')
x = tf.layers.batch_normalization(x, training=True)
x = tf.maximum(alpha * x, x)
x = tf.layers.conv2d(images, 128, 4, strides=2, padding='same')
x = tf.layers.batch_normalization(x, training=True)
x = tf.maximum(alpha * x, x)
x = tf.layers.conv2d(images, 256, 4, strides=2, padding='same')
x = tf.layers.batch_normalization(x, training=True)
x = tf.maximum(alpha * x, x)
flat = tf.reshape(x, (-1, 7*7*256))
logits = tf.layers.dense(flat, 1)
output = tf.sigmoid(logits)
return output, logits
print("Success")
def generator(z, out_channel_dim, is_train=True):
"""
Create the generator network
:param z: Input z
:param out_channel_dim: The number of channels in the output image
:param is_train: Boolean if generator is being used for training
:return: The tensor output of the generator
"""
# TODO: Implement Function
alpha = 0.2
reuse = not is_train
with tf.variable_scope('generator', reuse=reuse):
x = tf.layers.dense(z, 4*4*512)
x = tf.reshape(x, (-1, 4,4,512))
x = tf.layers.batch_normalization(x, training=is_train)
x = tf.maximum(alpha*x, x)
x = tf.layers.conv2d_transpose(x, 256, 4, strides=1, padding='valid')
x = tf.layers.batch_normalization(x, training=is_train)
x = tf.maximum(alpha*x, x)
x = tf.layers.conv2d_transpose(x, 128, 4, strides=2, padding='same')
x = tf.layers.batch_normalization(x, training=is_train)
x = tf.maximum(alpha*x, x)
#x = tf.nn.dropout(x, keep_prob=0.95)
logits = tf.layers.conv2d_transpose(x, out_channel_dim, 5, strides=2, padding='same')
out = tf.tanh(logits)
return out
print("Success")
def model_loss(input_real, input_z, out_channel_dim):
"""
Get the loss for the discriminator and generator
:param input_real: Images from the real dataset
:param input_z: Z input
:param out_channel_dim: The number of channels in the output image
:return: A tuple of (discriminator loss, generator loss)
"""
# TODO: Implement Function
alpha=0.9
g_model = generator(input_z, out_channel_dim)
d_model_real, d_logits_real = discriminator(input_real)
d_model_fake, d_logits_fake = discriminator(g_model, reuse=True)
d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_real, labels=tf.ones_like(d_model_real)*alpha))
d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.zeros_like(d_model_fake)))
g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=d_logits_fake, labels=tf.ones_like(d_model_fake)))
d_loss = d_loss_real + d_loss_fake
return d_loss, g_loss
print("Success")
def model_opt(d_loss, g_loss, learning_rate, beta1):
"""
Get optimization operations
:param d_loss: Discriminator loss Tensor
:param g_loss: Generator loss Tensor
:param learning_rate: Learning Rate Placeholder
:param beta1: The exponential decay rate for the 1st moment in the optimizer
:return: A tuple of (discriminator training operation, generator training operation)
"""
# TODO: Implement Function
t_vars = tf.trainable_variables()
d_vars = [var for var in t_vars if var.name.startswith('discriminator')]
g_vars = [var for var in t_vars if var.name.startswith('generator')]
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
d_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(d_loss, var_list=d_vars)
g_train_opt = tf.train.AdamOptimizer(learning_rate, beta1=beta1).minimize(g_loss, var_list=g_vars)
return d_train_opt, g_train_opt
print("Success")
import numpy as np
def show_generator_output(sess, n_images, input_z, out_channel_dim, image_mode):
"""
Show example output for the generator
:param sess: TensorFlow session
:param n_images: Number of Images to display
:param input_z: Input Z Tensor
:param out_channel_dim: The number of channels in the output image
:param image_mode: The mode to use for images ("RGB" or "L")
"""
cmap = None if image_mode == 'RGB' else 'gray'
z_dim = input_z.get_shape().as_list()[-1]
example_z = np.random.uniform(-1, 1, size=[n_images, z_dim])
samples = sess.run(
generator(input_z, out_channel_dim, False),
feed_dict={input_z: example_z})
images_grid = helper.images_square_grid(samples, image_mode)
pyplot.imshow(images_grid, cmap=cmap)
pyplot.show()
def train(epoch_count, batch_size, z_dim, learning_rate, beta1, get_batches, data_shape, data_image_mode):
# TODO: Build Model
#out_channel_dim = 3 if data_image_mode=="RGB" else 1
input_real, input_z, lr = model_inputs(*data_shape[1:], z_dim)
d_loss, g_loss = model_loss(input_real, input_z, data_shape[3])
d_opt, g_opt = model_opt(d_loss, g_loss, lr, beta1)
steps = 0
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch_i in range(epoch_count):
for batch_images in get_batches(batch_size):
# TODO: Train Model
steps+=1
batch_images = batch_images*2
batch_z = np.random.uniform(-1, 1, size=(batch_size, z_dim))
_ = sess.run(d_opt, feed_dict={input_real:batch_images, input_z: batch_z, lr:learning_rate})
_ = sess.run(g_opt, feed_dict={input_z:batch_z, lr: learning_rate, input_real: batch_images})
if steps % 100 == 0:
train_loss_d = d_loss.eval({input_z: batch_z, input_real: batch_images})
train_loss_g = g_loss.eval({input_z: batch_z})
print("Epoch {}/{}...".format(epoch_i+1, epochs),
"Discriminator Loss: {:.4f}...".format(train_loss_d),
"Generator Loss: {:.4f}".format(train_loss_g),
"Step: {:.4f}".format(steps))
if steps % 200 == 0:
show_generator_output(sess, 25, input_z, data_shape[3], data_image_mode)
#l_rate = [0.01, 0.005, 0.02]
#b1 = [0.3,0.4,0.5,0.6]
batch_size = 50
z_dim = 50
learning_rate = 0.005
beta1 = 0.4 #4/6
tf.reset_default_graph()
epochs = 2
mnist_dataset = helper.Dataset('mnist', glob(os.path.join(data_dir, 'mnist/*.jpg')))
with tf.Graph().as_default():
train(epochs, batch_size, z_dim, learning_rate, beta1, mnist_dataset.get_batches,
mnist_dataset.shape, mnist_dataset.image_mode)
20 mins/one epoch
batch_size = 32
z_dim = 128
learning_rate = 0.0003
beta1 = 0.2
epochs = 2
celeba_dataset = helper.Dataset('celeba', glob(os.path.join(data_dir, 'img_align_celeba/*.jpg')))
with tf.Graph().as_default():
train(epochs, batch_size, z_dim, learning_rate, beta1, celeba_dataset.get_batches,
celeba_dataset.shape, celeba_dataset.image_mode)
batch_size = 32
z_dim = 128
learning_rate = 0.003
beta1 = 0.2
epochs = 5
memes_dataset = helper.Dataset('memes', glob(os.path.join(data_dir, 'memes/*.jpg')))
with tf.Graph().as_default():
train(epochs, batch_size, z_dim, learning_rate, beta1, memes_dataset.get_batches,
memes_dataset.shape, memes_dataset.image_mode)
The numbers (MNIST) dataset worked the best for the computer in order to generate new similar images due to the similarity of the data. This is similar with the celebrity faces, but with the meme dataset, there was such a wide variety of data and distinct features leading to more abstract images.